* do file to import and clean house price data from Vic gov: https://www.propertyandlandtitles.vic.gov.au/property-information/property-prices


* Input:
* "OrigData/HousePrices/Quarterly stats 2000 - 2007.xlsx",
* "OrigData/HousePrices/Quarterly stats 2008 - 2011.xlsx"
* "OrigData/HousePrices/Quarterly stats 2012 onwards.xlsx"
* Data/locality_postcodematch.dta

* Output:
* Data/quarterlyhouse_1.dta
* Data/quarterlyhouse_2.dta
* Data/quarterlyhouse_3.dta
* Data/quarterly_houseunitpricedata.dta
* Data/postcodequarter_house_unit_prices.dta



 *****************************
 **** IMPORT HOUSE PRICE DATA ****
 *****************************
 
* 2000 to 2007

	set more off 

	clear

import excel "OrigData/HousePrices/Quarterly stats 2000 - 2007.xlsx", cellrange(A2:J99969) firstrow

	rename Suburb locality
	rename TotalSales housesales 
	rename MedianPrice houseprice

	rename F unitsales
	rename G unitprice

	rename I vacantsales // vacant land
	rename J vacantprice 


	drop E H

save Data/quarterlyhouse_1.dta, replace


* 2008 to 2011
	clear
import excel "OrigData/HousePrices/Quarterly stats 2008 - 2011.xlsx", cellrange(A2:J49986) firstrow

	rename Suburb locality
	rename TotalSales housesales 
	rename MedianPrice houseprice

	rename F unitsales
	rename G unitprice

	rename I vacantsales // vacant land
	rename J vacantprice 


	drop E H

save Data/quarterlyhouse_2.dta, replace


* 2011 to 2015
	clear			
import excel "OrigData/HousePrices/Quarterly stats 2012 onwards.xlsx", cellrange(A2:J53109) firstrow

	rename Suburb locality
	rename TotalSales housesales 
	rename MedianPrice houseprice

	rename F unitsales
	rename G unitprice

	rename I vacantsales // vacant land
	rename J vacantprice 


	drop E H

save Data/quarterlyhouse_3.dta, replace

	append using Data/quarterlyhouse_2.dta
	append using Data/quarterlyhouse_1.dta

	split Quarter

	drop Quarter2

	destring Quarter1, ignore("st" "nd" "rd" "th" ) replace
	destring Quarter3, replace

	gen contract_quarter = yq(Quarter3,Quarter1)
	format contract_quarter %tq

	drop Quarter*

	replace locality = upper(locality)


	joinby locality using Data/postcode_locality.dta

save Data/quarterly_houseunitpricedata.dta, replace


	collapse (median) *price* (sum) *sales*, by(postcode contract_quarter)
	
* rescale 	

	replace houseprice = houseprice/1000
	replace unitprice = unitprice/1000
	replace vacantprice = vacantprice/1000
	
	
** outliers	
	replace unitprice = . if unitprice > 2000 // outliers median price > $2mill
	replace houseprice = . if houseprice > 2000 
	
save Data/postcodequarter_house_unit_prices.dta, replace
	



